home *** CD-ROM | disk | FTP | other *** search
- #
- # emailAddress.py
- # JunkMatcher
- #
- # Created by Benjamin Han on 2/1/05.
- # Copyright (c) 2005 Benjamin Han. All rights reserved.
- #
-
- # This program is free software; you can redistribute it and/or
- # modify it under the terms of the GNU General Public License
- # as published by the Free Software Foundation; either version 2
- # of the License, or (at your option) any later version.
-
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
-
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
- #!/usr/bin/env python
-
- from consts import *
- from GlobalObjects import *
-
-
- def validateEmailAddress (addr):
- """Returns tuple (email address, domain) if successful (both in lower case),
- otherwise returns (False, False)"""
- mo = globalObjects.emailAddrPat.match(addr)
- if mo:
- # first possibility: plain email address
- domain = mo.group(2)
- if domain:
- # rid of comments
- return globalObjects.emailAddrCommentPat.sub('', mo.group(1)).lower(), globalObjects.emailAddrCommentPat.sub('', domain).lower()
-
- # second possibility: in the form of "Name <id@domain>"
- domain = mo.group(4)
- if domain:
- # rid of comments
- return globalObjects.emailAddrCommentPat.sub('', mo.group(3)).lower(), globalObjects.emailAddrCommentPat.sub('', domain).lower()
- return False, False
-
- return False, False
-
- def extractEmailAddresses (s):
- """Extract email addresses and domains from string s:
-
- 1. If successful returns a list of tuples (text, email address, domain), where
- text is the original text of the address (could be in different encoding and/or could
- include names);
- 2. If failed returns a string that's skipped (the error is usually around the text).
- """
- ret = []
- first = True
- s = s.strip()
- sSize = len(s)
- if sSize == 0: return ret
-
- mo = None
- for mo in globalObjects.emailAddrPat.finditer(s):
- if first:
- if mo.start(0) != 0:
- # something is skipped
- return s[0:mo.start(0)]
- first = False
- else:
- if lastEnd != mo.start(0):
- # something is skipped
- return s[lastEnd:mo.start(0)]
-
- lastEnd = mo.end(0)
-
- text = mo.group(0).strip()
- if text[-1] == ',':
- text = text[:-1]
-
- # first possibility: plain email address
- domain = mo.group(2)
- if domain:
- ret.append((text,
- globalObjects.emailAddrCommentPat.sub('', mo.group(1)).lower(),
- globalObjects.emailAddrCommentPat.sub('', domain).lower()))
-
- else:
- # second possibility: in the form of "Name <id@domain>"
- ret.append((text,
- globalObjects.emailAddrCommentPat.sub('', mo.group(3)).lower(),
- globalObjects.emailAddrCommentPat.sub('', mo.group(4)).lower()))
-
- if mo:
- if mo.end(0) != sSize:
- # something is skipped
- return s[mo.end(0):sSize]
- else:
- return s
-
- return ret
-
-
- if __name__ == '__main__':
- print validateEmailAddress('foo@bar.com')
- print validateEmailAddress('Foo <foo@bar.com>')
- print validateEmailAddress('"Foo" <foo@bar.com>')
-
- print validateEmailAddress('foo@')
- print validateEmailAddress('@bar.com')
- print validateEmailAddress('"Foo" <foo@>')
-
- print validateEmailAddress('\'"Kent, Clark\'" <superman@krypton.net>')
-
- print validateEmailAddress('someone@foo.com<someone@foo.com>')
-
- print extractEmailAddresses(' "Foo, Bar" <foo@bar.com>, Foo1 <FOO1@bar.com>, foo2@bar.com')
- print extractEmailAddresses(' "Foo, Bar" <foo@bar.com>, Foo1 FOO1@bar.com>, foo2@bar.com')
- print extractEmailAddresses(' "Foo, Bar" <foo@bar.com>, Foo1 <FOO1@bar.com>, foo2')
- print extractEmailAddresses(' "Foo, Bar" <foo@bar.com>, <foo1@>, foo2@bar.com')
-